---
title: "Replication code for Promoting Democracy under Electoral Authoritarianism: Evidence from Cambodia"
author: "Susan D. Hyde, Emily Lamb, and Oren Samet (special thanks to Alex Stephenson)"
date: '`r format(Sys.Date(), "%B %d, %Y")`'
output: html_document
---

## Required libraries 

```{r setup, include=T, message = F, warning = F}
packages <- c("estimatr", "ggthemes", "haven", "janitor", "RItools", "rstatix", "stargazer", "tidyverse", "xtable")

# Taken from https://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them 
requiredPackages <- function(x){
  for(i in x){
    if(!require(i, character.only = T)){
      install.packages(i, dependencies = T)
      require(i,  character.only = T)
    }
  }
}

requiredPackages(packages)
```

## Getting data  
```{r,warning=F, message=F}
rep_data <- haven::read_dta("/Users/orensamet/Dropbox/Cambodia (shared with Oren)/Alex RA work/replication_code/data/Cambodiaworkingdata.dta")

regression_data <- rep_data %>% 
  filter(followupcomplete == 1)

```

## Estimation 

Estimation Functions 

Tables 1-6 treatment effects are estimated using the methods described in Lin (2013) and implemented through the DeclareDesign project's estimatr package. 

This block provides wrappers to reduce the (unnecessary) duplication code of creating dependent and independent variables for the different specifications. Refer to estimatr::lm_lin() for additional information and documentation. In addition, there are wrappers for pulling out appropriate coefficient estimates to make the figures and an R equivalent to Stata's robust standard error linear model specification. 

```{r, warning =F, message = F}
# Helper functions for Lin Esimtation 
# Get appropriate formula for DVs
makeDVs <- function(vector){
  vector %>%
    map(~ formula(paste0("f_", ., " ~ townhall ")))
}

# Appropriate formula for IVs plus covariates for adjustment
makeIVs <- function(vector){
  vector %>% 
    # Covariates for adjustment. 
    map(~ formula(paste0("~ b_", ., " + b_DEMO1 +b_DEMO2B + b_DEMO3 +  b_DEMO5 + b_DEMO6 +b_DEMO7B + b_hhownindex + b_DEMO13 + factor(district)")))
}

# Wrapper for Lin Estimation 
get_Lin <- function(vector, rep_data){
  dvs <- makeDVs(vector)
  ivs <- makeIVs(vector)
  map2(dvs, ivs, ~ lm_lin(.x, data = rep_data, covariates = .y,
      clusters = commune,se_type = "stata")) %>%
    map_df(tidy)%>%
    # To make reporting easier 
    mutate(pv_round = round(p.value, digits = 3))
}

# Wrapper for lm_robust to replicate stata robust estimation
get_stata <- function(vector){
  vector %>% 
    map(~ formula(paste0("f_", ., " ~ townhall + ", "b_",.))) %>%
    map(~ lm_robust(., data = rep_data, clusters = commune, se_type = "stata",
    fixed_effects = district)) %>%
    map_df(tidy)
}

# Helper function to pull out coefficient information
get_coefs <- function(df, values){
  df %>% 
    # Grab only the appropriate outcome variable estimates 
    filter(outcome %in% values)%>%
    # get just the estimate for the treatment 
    filter(term =="townhall")%>%
    select(outcome, estimate, std.error, conf.low, conf.high)
}

adjust_pvalues <- function(df){
  p.values <- df %>% 
    filter(term == "townhall")%>%
    rstatix::adjust_pvalue(method = "BH")%>%
    mutate(adj.method = "BH")%>%
    select(term, outcome, p.value.adj, adj.method)
  out <- dplyr::left_join(df, p.values, by = c("term", "outcome"))%>%
    mutate(p.value = ifelse(!is.na(p.value.adj), p.value.adj, p.value))%>%
    select(-p.value.adj)
  out
}

table_output <- function(df, round_number){
  if("adj.method"%in% names(df)){
    out <- df %>% 
    filter(term %in% c("(Intercept)", "townhall"))%>%
          mutate(term = str_replace_all(term, "\\(Intercept\\)", "Constant"))%>%
    select(outcome, term, estimate, std.error, p.value, adj.method)%>%
    mutate_if(is.numeric, signif, round_number)%>%
      arrange(outcome, desc(term))
  return(out)
  }
  # Expects the output of get_lin()
  out <- df %>% 
    filter(term %in% c("(Intercept)", "townhall"))%>%
    mutate(term = str_replace_all(term, "\\(Intercept\\)", "Constant"))%>%
    select(outcome, term, estimate, std.error, p.value)%>%
    mutate_if(is.numeric, signif, round_number)%>%
      arrange(outcome, desc(term))
  return(out)
}
```

### Lin Estimators 

The following gives estimates for the Lin Estimator with CR2 errors by default. 

Running this will show a warning message that two coefficients are not defined because the design matrix is rank deficient. These two coefficients are always the the interaction of treatment with b_DEMO11_dum1_c	and b_DEMO11_dum3_c, which are captured entirely by b_DEMO11_dum_3_c. They do not affect the treatment estimate because they are automatically dropped by R. 

```{r, warning = F, message = F}
table2a_lin <- get_Lin(c("KPP1_response", "KPP2_response", "KPP3_response", "MNAcorrect",
    "KPP4_response", "KPP5_dummy_exclusive", "KPPindex1"), regression_data)

table2b_lin <- get_Lin(c("KPP6_1", "KPP6_2", "KPP6_3", "KPP6_4", "KPP6_5", "KPP6_6","KPPindex2"), regression_data)

table3_lin <- get_Lin(c("FP1_new", "FP2_new", "FP3_new", "FPindex_positiveonly"), regression_data)

table4a_lin <- get_Lin(c("EPP1", "EPP2"), regression_data)

table4b_lin <- get_Lin(c("EPP3_1", "EPP3_2", "EPP3_3", "EPP3_4", "EPP3_5", "EPP3_6","EPP3_7", "EPP3_8", "EPP3_9", "EPP3_10", "EPP3index"), regression_data)

table5_lin <- get_Lin(c("EPP4_1_ordinal", "EPP4_2_ordinal", "EPP4_3_ordinal","EPP4_4_ordinal", "EPP4_5_ordinal", "EPP4_6_ordinal", "EPP4_7_ordinal", "EPP4_8_ordinal", "EPP4_9_ordinal", "EPP4_10_ordinal", "EPP4_11_ordinal", "EPP4_12_ordinal","EPP4index_positiveonly"), regression_data)

table6_lin <- get_Lin(c("EPP5_1_ordinal", "EPP5_2_ordinal", "EPP5_3_ordinal", "EPP5_4_ordinal", "EPP5_5_ordinal", "EPP5_6_ordinal","EPP5_7_ordinal", "EPP5_8_ordinal", "EPP5index_positiveonly"), regression_data)

table7_lin <- get_Lin(c("IMP1", "IMP3_response", "IMPindex"), regression_data)

table8a_lin <- get_Lin(c("CP1_1_new", "CP1_2_new", "CP1_3_new", "CP1_4_new", "CPP1index_agreeonly"), regression_data)

table8b_lin <- get_Lin(c("CP2_1_new", "CP2_2_new", "CP2_3_new", "CP2_4_new", "CP2_5_new","CP2_6_new", "CPO2index_agreeonly"), regression_data)
```

### Show each table's estimate and intercept 

```{r}
stargazer::stargazer(table_output(table2a_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table2b_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table3_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table4a_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table4b_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table5_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table6_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table7_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table8a_lin,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table8b_lin,round_number = 2),type = "text", summary = F)
```

## Adjusted p-values 

Adjusted p-values are reported with the Benjamini & Hochberg correction. 

```{r}
table2a_lin.adjust <- adjust_pvalues(table2a_lin)
table2b_lin.adjust <- adjust_pvalues(table2b_lin)
table3_lin.adjust <- adjust_pvalues(table3_lin)
table4a_lin.adjust <- adjust_pvalues(table4a_lin)
table4b_lin.adjust <- adjust_pvalues(table4b_lin)
table5_lin.adjust <- adjust_pvalues(table5_lin)
table6_lin.adjust <- adjust_pvalues(table6_lin)
table7_lin.adjust <- adjust_pvalues(table7_lin)
table8a_lin.adjust <- adjust_pvalues(table8a_lin)
table8b_lin.adjust <- adjust_pvalues(table8b_lin)

### Index Correction 
indexes <- bind_rows(list(table2a_lin.adjust,
                               table2b_lin.adjust,
                               table3_lin.adjust,
                               table4a_lin.adjust,
                               table4b_lin.adjust,
                               table5_lin.adjust,
                               table6_lin.adjust,
                               table7_lin.adjust,
                               table8a_lin.adjust,
                          table8b_lin.adjust))%>%
  filter(grepl("index", outcome))

indexes.adjust <- adjust_pvalues(indexes)
```


### Report Adjusted p-values 
```{r}
stargazer::stargazer(table_output(table2a_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table2b_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table3_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table4a_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table4b_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table5_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table6_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table7_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table8a_lin.adjust,round_number = 2),type = "text", summary = F)
```

```{r}
stargazer::stargazer(table_output(table8b_lin.adjust,round_number = 2),type = "text", summary = F)
```
